Run this query_im3_scen("energy") only once to query
from remote IM3 databases. Once a .dat file is created, we
can load the existing project data by
loadProject(proj = "im3scen_energy.dat").
# query the data
# im3_energy <- query_im3_scen("energy")
# load the data
im3_energy <- loadProject(proj = paste0("../", data_dir, "im3scen_energy.dat"))
# scenarios and queries
listScenarios(im3_energy)
[1] "rcp45cooler_ssp3" "rcp45cooler_ssp5" "rcp45hotter_ssp3" "rcp45hotter_ssp5" "rcp85cooler_ssp3" "rcp85cooler_ssp5" "rcp85hotter_ssp3" "rcp85hotter_ssp5"
listQueries(im3_energy)
[1] "USA inputs by tech" "USA outputs by tech" "inputs by subsector (non-electric)" "elec gen by subsector" "USA regional natural gas outputs" "elec energy input by subsector"
# mappings
source_mapping <- read_csv(paste0("../", data_dir, "mappings/source_mapping_en.csv"))
Rows: 85 Columns: 2-- Column specification -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Delimiter: ","
chr (2): input, Source
i Use `spec()` to retrieve the full column specification for this data.
i Specify the column types or set `show_col_types = FALSE` to quiet this message.
target_mapping <- read_csv(paste0("../", data_dir, "mappings/target_mapping_en.csv"))
Rows: 103 Columns: 2-- Column specification -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Delimiter: ","
chr (2): sector, Target
i Use `spec()` to retrieve the full column specification for this data.
i Specify the column types or set `show_col_types = FALSE` to quiet this message.
node_mapping <- read_csv(paste0("../", data_dir, "mappings/node_mapping_en.csv"))
Rows: 20 Columns: 5-- Column specification -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Delimiter: ","
chr (4): label, stage, hex, color_name
dbl (1): node
i Use `spec()` to retrieve the full column specification for this data.
i Specify the column types or set `show_col_types = FALSE` to quiet this message.
# get queries
inputByTechUSA <- getQuery(im3_energy, "USA inputs by tech")
outputByTechUSA <- getQuery(im3_energy, "USA outputs by tech")
inputBySubsectorNonElec <- getQuery(im3_energy, 'inputs by subsector (non-electric)')
elecEnergyInputBySubsector <- getQuery(im3_energy, 'elec energy input by subsector') %>% filter(Units == "EJ") # in case no filtering of ELEC_RPS credits
elecGenBySubsector <- getQuery(im3_energy, 'elec gen by subsector') %>% filter(Units == "EJ") # in case no filtering of ELEC_RPS credits
natGasOutputs <- getQuery(im3_energy, 'USA regional natural gas outputs')
datatables_energy <- list(
"inputByTechUSA" = inputByTechUSA,
"outputByTechUSA" = outputByTechUSA,
"inputBySubsectorNonElec" = inputBySubsectorNonElec,
"elecEnergyInputBySubsector" = elecEnergyInputBySubsector,
"elecGenBySubsector" = elecGenBySubsector,
"natGasOutputs" = natGasOutputs
)
# print column names of each datatable
lapply(datatables_energy, function(x) colnames(x))
$inputByTechUSA
[1] "Units" "scenario" "region" "sector" "subsector" "technology" "input" "year" "value"
$outputByTechUSA
[1] "Units" "scenario" "region" "sector" "subsector" "technology" "output" "year" "value"
$inputBySubsectorNonElec
[1] "Units" "scenario" "region" "sector" "subsector" "input" "year" "value"
$elecEnergyInputBySubsector
[1] "Units" "scenario" "region" "sector" "subsector" "input" "year" "value"
$elecGenBySubsector
[1] "Units" "scenario" "region" "subsector" "year" "value"
$natGasOutputs
[1] "Units" "scenario" "region" "sector" "technology" "output" "year" "value"
# print the first few rows of each datatable
lapply(datatables_energy, function(x) (x))
$inputByTechUSA
$outputByTechUSA
$inputBySubsectorNonElec
$elecEnergyInputBySubsector
$elecGenBySubsector
$natGasOutputs
NA
Let’s process each piece to prepare the format of: scenario, source, target, year, value. Scenario and year could be filtered for each Sankey.
# map non electricity energy flows to major aggregated categories based on the mapping file
inputs_by_subsector_nonelec <- inputBySubsectorNonElec %>%
filter(Units == 'EJ') %>%
filter(!input %in% c('regional corn', 'regional soybean')) %>%
# aggregate all monthly_day combinations to one category e.g., electricity domestic supply_Nov_day to electricity domestic supply
remove_month_day_night_superpeak("sector") %>% remove_month_day_night_superpeak("input") %>%
left_join(source_mapping, by = 'input') %>%
left_join(target_mapping, by = 'sector')
# Note there are NAs in the output due to missing mappings or sectors that are
# not supposed to be targets and inputs that are not supposed# to be sources
# things that were remapped as sources
unique((inputs_by_subsector_nonelec %>% filter(!is.na(Source)))$sector)
[1] "comm cooking" "comm cooling" "comm heating" "comm hot water" "comm lighting" "comm non-building" "comm office" "comm other" "comm refrigeration" "comm ventilation" "delivered biomass" "gas to liquids" "industrial energy use"
[14] "industrial feedstocks" "industry" "oil refining" "regional biomass" "resid clothes dryers" "resid clothes washers" "resid computers" "resid cooking" "resid dishwashers" "resid freezers" "resid furnace fans" "resid heating" "resid hot water"
[27] "resid lighting" "resid other" "resid refrigerators" "resid televisions" "trn_aviation_intl" "trn_freight" "trn_freight_road" "trn_pass" "trn_pass_road" "trn_pass_road_LDV" "trn_pass_road_LDV_4W" "trn_shipping_intl" "biomass liquids"
[40] "cement" "coal to liquids" "process heat cement" "regional biomassOil" "N fertilizer"
unique((inputs_by_subsector_nonelec %>% filter(!is.na(Source)))$subsector)
[1] "electricity" "gas" "biomass" "coal" "refined liquids" "delivered biomass" "gas to liquids" "hydrogen" "industry" "oil refining" "regional biomass" "International Aviation" "Domestic Ship"
[14] "Freight Rail" "Heavy truck" "Light truck" "Medium truck" "Domestic Aviation" "HSR" "Passenger Rail" "Bus" "2W and 3W" "Car" "Large Car and Truck" "International Ship" "biomass liquids"
[27] "cement" "coal to liquids" "regional biomassOil"
unique((inputs_by_subsector_nonelec %>% filter(!is.na(Source)))$input) # look at this
[1] "elect_td_bld" "delivered gas" "delivered biomass" "delivered coal" "refined liquids enduse" "regional biomass" "regional natural gas" "elect_td_ind" "wholesale gas" "H2 enduse" "refined liquids industrial"
[12] "industrial energy use" "industrial feedstocks" "industrial processes" "regional oil" "elect_td_trn" "regional biomassOil" "regional corn for ethanol" "regional coal" "regional oilcrop"
# things there were NOT mapped as sources
unique((inputs_by_subsector_nonelec %>% filter(is.na(Source)))$sector)
[1] "comm cooling" "comm heating" "elect_td_ind" "elect_td_trn" "industrial feedstocks" "resid cooling" "resid heating" "trn_pass" "cement"
unique((inputs_by_subsector_nonelec %>% filter(is.na(Source)))$subsector)
[1] "electricity" "elect_td_ind" "elect_td_trn" "refined liquids" "Cycle" "Walk" "cement"
unique((inputs_by_subsector_nonelec %>% filter(is.na(Source)))$input) # look at this
[1] "electricity domestic supply" "oil-credits" "renewable" "process heat cement"
# things that were remapped as targets
unique((inputs_by_subsector_nonelec %>% filter(!is.na(Target)))$sector) # look at this
[1] "comm cooking" "comm cooling" "comm heating" "comm hot water" "comm lighting" "comm non-building" "comm office" "comm other" "comm refrigeration" "comm ventilation" "delivered biomass" "elect_td_ind" "elect_td_trn"
[14] "gas to liquids" "industrial energy use" "industrial feedstocks" "industry" "oil refining" "regional biomass" "resid clothes dryers" "resid clothes washers" "resid computers" "resid cooking" "resid cooling" "resid dishwashers" "resid freezers"
[27] "resid furnace fans" "resid heating" "resid hot water" "resid lighting" "resid other" "resid refrigerators" "resid televisions" "trn_aviation_intl" "trn_freight" "trn_freight_road" "trn_pass" "trn_pass_road" "trn_pass_road_LDV"
[40] "trn_pass_road_LDV_4W" "trn_shipping_intl" "biomass liquids" "cement" "coal to liquids" "process heat cement" "regional biomassOil" "N fertilizer"
unique((inputs_by_subsector_nonelec %>% filter(!is.na(Target)))$subsector)
[1] "electricity" "gas" "biomass" "coal" "refined liquids" "delivered biomass" "elect_td_ind" "elect_td_trn" "gas to liquids" "hydrogen" "industry" "oil refining" "regional biomass"
[14] "International Aviation" "Domestic Ship" "Freight Rail" "Heavy truck" "Light truck" "Medium truck" "Cycle" "Domestic Aviation" "HSR" "Passenger Rail" "Walk" "Bus" "2W and 3W"
[27] "Car" "Large Car and Truck" "International Ship" "biomass liquids" "cement" "coal to liquids" "regional biomassOil"
unique((inputs_by_subsector_nonelec %>% filter(!is.na(Target)))$input)
[1] "elect_td_bld" "delivered gas" "electricity domestic supply" "delivered biomass" "delivered coal" "refined liquids enduse" "regional biomass" "regional natural gas" "elect_td_ind" "wholesale gas" "H2 enduse"
[12] "refined liquids industrial" "oil-credits" "industrial energy use" "industrial feedstocks" "industrial processes" "regional oil" "renewable" "elect_td_trn" "regional biomassOil" "regional corn for ethanol" "process heat cement"
[23] "regional coal" "regional oilcrop"
# things that were NOT remapped as targets
unique((inputs_by_subsector_nonelec %>% filter(is.na(Target)))$sector) # look at this
character(0)
unique((inputs_by_subsector_nonelec %>% filter(is.na(Target)))$subsector)
character(0)
unique((inputs_by_subsector_nonelec %>% filter(is.na(Target)))$input)
character(0)
# check for unmatched sources
inputs_by_subsector_nonelec_unmatched_source <- inputs_by_subsector_nonelec %>%
filter(is.na(Source)) %>%
select(scenario, sector, subsector, input, Source, Target) %>%
unique
unique(inputs_by_subsector_nonelec_unmatched_source$input)
[1] "electricity domestic supply" "oil-credits" "renewable" "process heat cement"
unmatched_sources <- c("electricity domestic supply", "oil-credits","renewable", "process heat cement","process heat dac")
if(! all(inputs_by_subsector_nonelec_unmatched_source$input %in% unmatched_sources )){
unmatched <- setdiff(inputs_by_subsector_nonelec_unmatched_source$input, unmatched_sources)
stop(paste0("Unmatched Sources in inputs by subsector nonelec. Check Source mapping file against gcam data: ", paste(unmatched, collapse = ' - ')))
}
# check for unmatched targets
inputs_by_subsector_nonelec_unmatched_target <- inputs_by_subsector_nonelec %>%
filter(is.na(Target)) %>%
select(scenario, sector, subsector, input, Source, Target) %>%
unique
unique(inputs_by_subsector_nonelec_unmatched_target$sector)
character(0)
unmatched_targets <- c("H2 central production",
"H2 liquid truck",
"H2 pipeline",
"H2 wholesale delivery" #all intermediate hydrogen markets that are double counting - only want H2 industrial and H2 MHDV
)
if(! all(inputs_by_subsector_nonelec_unmatched_target$sector %in% unmatched_targets)){
unmatched <- setdiff(inputs_by_subsector_nonelec_unmatched_target$sector, unmatched_targets)
stop(paste0("Unmatched Sources in inputs by subsector nonelec. Check Source mapping file against gcam data: ", paste(unmatched, collapse = ' - ')))
}
Get other flows such as gas processing and backup electricity
gas_processing_flows <- inputByTechUSA %>%
filter(sector == "gas processing") %>%
left_join(source_mapping, by = "input") %>%
left_join(target_mapping, by = "sector") %>%
group_by(scenario, Units, year, Source, Target) %>%
summarize(value = sum(value)) %>%
ungroup()
`summarise()` has grouped output by 'scenario', 'Units', 'year', 'Source'. You can override using the `.groups` argument.
backup <- inputByTechUSA %>%
filter(sector %in% c("backup_electricity", "csp_backup")) %>%
left_join(source_mapping, by = "input") %>%
left_join(target_mapping, by = "sector") %>%
group_by(scenario, Units, year, Source, Target) %>%
summarize(value = sum(value)) %>%
ungroup()
`summarise()` has grouped output by 'scenario', 'Units', 'year', 'Source'. You can override using the `.groups` argument.
elec_energy_by_subsector <- elecEnergyInputBySubsector %>%
filter(Units == 'EJ') %>%
filter(!input %in% c('backup_electricity', 'csp_backup'),
!subsector %in% c("nuclear", "geothermal")) %>% #don't want to double count electricity from backup, and nuclear and geothermal are reported from output
left_join(target_mapping, by = 'sector') %>%
left_join(source_mapping, by = 'input')
#hydropower is only available as an output. In the "direct equivalent" reporting convention used here, input = output
hydro_power <- elecEnergyInputBySubsector %>%
filter(subsector == 'hydro') %>%
mutate(Source = 'Hydropower',
Target = 'Electricity')
# nuclear's reported thermal inputs assume a 3:1 conversion, so for "direct equivalent" reporting we use the output
nuclear <- elecEnergyInputBySubsector %>%
filter(subsector == 'nuclear') %>%
mutate(Source = 'Nuclear',
Target = 'Electricity')
# geothermal's reported thermal inputs assume a 10:1 conversion, so for "direct equivalent" reporting we use the output
geothermal <- elecEnergyInputBySubsector %>%
filter(subsector == 'geothermal') %>%
mutate(Source = 'Geothermal',
Target = 'Electricity')
# put everything together
all_energy <- inputs_by_subsector_nonelec %>%
bind_rows(gas_processing_flows) %>%
bind_rows(backup) %>%
bind_rows(elec_energy_by_subsector) %>%
bind_rows(hydro_power) %>%
bind_rows(nuclear) %>%
bind_rows(geothermal)
Source_Target_all <- all_energy %>%
group_by(scenario, Units, Source, Target, year) %>%
summarise(value = sum(value)) %>%
filter( Source != Target) %>%
filter( Target != 'Biomass') %>%
ungroup()
`summarise()` has grouped output by 'scenario', 'Units', 'Source', 'Target'. You can override using the `.groups` argument.
datatable(Source_Target_all, filter = 'top', rownames = FALSE)
scenario_name <- "rcp45cooler_ssp3"
plot_scenario_name <- 'RCP 4.5 Cooler SSP3'
select_year <- '2050'
gcam_data_unit <- 'EJ'
# sankey formatting
link_alpha <- .5
# source/target mapping
node_mapping_in <- node_mapping
# GCAM data
gcam_data <- Source_Target_all %>%
filter(scenario == scenario_name) %>% filter( year == select_year) %>% select(-scenario)
all_links <- c(gcam_data$Source, gcam_data$Target) %>% unique
node_mapping <- node_mapping_in %>% filter(label %in% all_links)
node_mapping$node <- 0:(nrow(node_mapping)-1)
# process node data
links_data <- gcam_data %>%
select(Source, Target, value) %>%
mutate(Target = ifelse(str_detect(Target, 'Ind'), 'Industry', Target)) %>%
group_by(Source, Target) %>%
summarize(value = sum(value)) %>%
ungroup() %>%
rename(Source_label = Source,
Target_label = Target) %>%
left_join(node_mapping %>% select(label, node), by = c('Source_label' = 'label')) %>%
rename(Source_node = node) %>%
left_join(node_mapping %>% select(label, node), by = c('Target_label' = 'label')) %>%
rename(Target_node = node) %>%
left_join(node_mapping %>% select(label, stage, hex, color_name), by = c('Source_label' = 'label')) %>%
mutate(rgb = apply(FUN = paste, MARGIN = 2, X = col2rgb(hex), collapse = ',')) %>%
mutate(rgba = paste0('rgba(', rgb, ', ', link_alpha,')')) %>%
mutate(link_label = paste(Source_label, round(value, digits = 1),'EJ')) %>%
filter(value>0) %>%
arrange(Source_node)
`summarise()` has grouped output by 'Source'. You can override using the `.groups` argument.
datatable(links_data, filter = 'top', rownames = FALSE, options = list(pageLength = 10, scrollX = TRUE))
# process node percent labels
# source
source_sum <- links_data %>%
select(Source_label, value) %>%
left_join(node_mapping %>% select(label, stage), by = c('Source_label' = 'label')) %>%
rename(label=Source_label) %>%
filter(tolower(stage) == 'source') %>%
group_by(label, stage) %>%
summarize(node_sum = sum(value))
`summarise()` has grouped output by 'label'. You can override using the `.groups` argument.
source_total <- source_sum %>%
pull(node_sum) %>% sum
source_percent <- source_sum %>%
mutate(percent = node_sum/source_total*100) %>%
left_join(node_mapping) %>%
arrange(node) %>%
mutate(x = .01) %>%
mutate(csum_norm = source_total)
Joining with `by = join_by(label, stage)`
source_percent$csum <- cumsum(source_percent$node_sum)
source_percent$start <- lag(source_percent$csum)
# target
target_sum <- links_data %>%
select(Target_label, value) %>%
left_join(node_mapping %>% select(label, stage), by = c('Target_label' = 'label')) %>%
rename(label=Target_label) %>%
filter(stage == 'target') %>%
group_by(label, stage) %>%
summarize(node_sum = sum(value))
`summarise()` has grouped output by 'label'. You can override using the `.groups` argument.
target_total <- target_sum %>%
pull(node_sum) %>% sum
target_percent <- target_sum %>%
mutate(percent = node_sum/target_total*100) %>%
left_join(node_mapping) %>%
arrange(node) %>%
mutate(x = .95) %>%
mutate(csum_norm = target_total)
Joining with `by = join_by(label, stage)`
target_percent$csum <- cumsum(target_percent$node_sum)
target_percent$start <- lag(target_percent$csum)
# Intermediate Carriers Flows in
intermediate_nodes <- node_mapping %>% filter(stage == 'mid') %>% pull(label)
intermediate_flows_in_total <- links_data %>%
filter(Target_label %in% intermediate_nodes) %>%
group_by(Target_label) %>%
summarize(node_sum = sum(value))
intermediate_percent <- intermediate_flows_in_total %>%
rename(label = Target_label) %>%
mutate(stage = 'mid') %>%
mutate(percent =node_sum/source_total*100) %>%
left_join(node_mapping)
Joining with `by = join_by(label, stage)`
intermediate_total <- intermediate_percent %>% pull(node_sum) %>% sum
intermediate_flows_out_total <- links_data %>%
filter(Source_label %in% intermediate_nodes) %>%
group_by(Source_label) %>%
summarize(value = sum(value))
# process node locations
# final node info
nodes_data <- bind_rows(source_percent, intermediate_percent, target_percent) %>%
arrange(node)%>%
replace_na(list(start = 0)) %>%
mutate(mid_point = (start+csum)/2) %>%
mutate(y = mid_point/csum_norm) %>%
mutate(y = ifelse(label == 'Gas', 0.3,
ifelse(label == 'Liquid Fuels', 0.2,
ifelse(label == 'Electricity', 0.6,
ifelse(label == 'Hydrogen',0.9,y))))) %>%
mutate(x = ifelse(label == 'Gas', 0.35,
ifelse(label == 'Liquid Fuels', 0.5,
ifelse(label == 'Electricity', 0.6,
ifelse(label == 'Hydrogen',0.7,x))))) %>%
mutate(node_label = ifelse(is.na(node_sum), label,
paste0(label, ' ',round(node_sum, digits = 1) , gcam_data_unit,
' ', round(percent, digits = 1),'%')))
# Check that Source and Targets in Links are in the node mapping
if( any(is.na(links_data$Source_node)) ) stop("Check Source number mapping - NA's")
if( any(is.na(links_data$Target_node)) ) stop("Check Target number mapping - NA's")
datatable(nodes_data, filter = 'top', rownames = FALSE, options = list(pageLength = 20, scrollX = TRUE))
# save files for Kendall
write_csv(Source_Target_all, paste0("../", data_dir, 'allenergy_source_target.csv'))
write_csv(nodes_data, paste0("../", data_dir, 'allenergy_nodes_data.csv'))
write_csv(links_data, paste0("../", data_dir, 'allenergy_links_data.csv'))
# plot sankey
sankey_figure <- plot_ly(
type = "sankey",
# arrangement = "snap",
domain = list(x = c(0,1),y = c(0,1)),
orientation = "h",
valueformat = ".0f",
valuesuffix = gcam_data_unit,
# Nodes
node = list( label = nodes_data %>% pull(node_label),
color = nodes_data %>% pull(hex),
x = nodes_data %>% pull(x),
y = nodes_data %>% pull(y),
pad = 3,
thickness = 15,
line = list(color = "black",width = 0.5)),
# Links
link = list(source = links_data$Source_node,
target = links_data$Target_node,
value = links_data$value,
color = links_data$rgba)
)
# add Formatting
plot_title <- paste0('Energy - ', plot_scenario_name, ' - ',select_year)
sankey_figure <- sankey_figure %>% layout(
title = plot_title,
font = list(size = 11),
xaxis = list(showgrid = F, zeroline = F),
yaxis = list(showgrid = F, zeroline = F))
sankey_figure
NA
NA